bitkeeper revision 1.498.1.2 (3f870808Dvsdzic2p7uQgZ0pTQzx7w)
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>
Fri, 10 Oct 2003 19:27:04 +0000 (19:27 +0000)
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>
Fri, 10 Oct 2003 19:27:04 +0000 (19:27 +0000)
add a handy tool for reading the P4 performance counters: xen_perfctr

.rootkeys
tools/misc/Makefile
tools/misc/p4perf.h [new file with mode: 0644]
tools/misc/xen_cpuperf.c [new file with mode: 0644]

index f6203c2848b08ec44c690decc5bae733bd6d997f..48b0bfefb6516751518711d89721208e27914b21 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 3f6dc136ZKOjd8PIqLbFBl_v-rnkGg tools/misc/miniterm/Makefile
 3f6dc140C8tAeBfroAF24VrmCS4v_w tools/misc/miniterm/README
 3f6dc142IHaf6XIcAYGmhV9nNSIHFQ tools/misc/miniterm/miniterm.c
+3f870808_8aFBAcZbWiWGdgrGQyIEw tools/misc/p4perf.h
 3f5ef5a2ir1kVAthS14Dc5QIRCEFWg tools/misc/xen-clone
 3f5ef5a2dTZP0nnsFoeq2jRf3mWDDg tools/misc/xen-clone.README
 3f1668d4-FUY6Enc7MB3GcwUtfJ5HA tools/misc/xen-mkdevnodes
+3f870808zS6T6iFhqYPGelroZlVfGQ tools/misc/xen_cpuperf.c
 3f13d81eQ9Vz-h-6RDGFkNR9CRP95g tools/misc/xen_nat_enable
 3f13d81e6Z6806ihYYUw8GVKNkYnuw tools/misc/xen_nat_enable.README
 3f1668d4F29Jsw0aC0bJEIkOBiagiQ tools/misc/xen_read_console.c
index 62e1ab7fee8daa69ed89c4e5736f9846c97792ee..59899f5f530610c116681aa247bf0e92baa0b863 100644 (file)
@@ -1,13 +1,13 @@
 
 CC       = gcc
 CFLAGS   = -Wall -O3 
-CFLAGS  += -I../../xen/include -I../../xenolinux-sparse/include
+CFLAGS  += -I../../xen/include -I../../xenolinux-sparse/include -I../internal
 
 HDRS     = $(wildcard *.h)
 SRCS     = $(wildcard *.c)
 OBJS     = $(patsubst %.c,%.o,$(SRCS))
 
-TARGETS  = xen_read_console 
+TARGETS  = xen_read_console xen_cpuperf
 
 INSTALL  = $(TARGETS) xen-mkdevnodes xen-clone
 
diff --git a/tools/misc/p4perf.h b/tools/misc/p4perf.h
new file mode 100644 (file)
index 0000000..4f681b6
--- /dev/null
@@ -0,0 +1,559 @@
+/*
+ * For P6 use PERFCTR1 (0 used for APIC NMI watchdog). Must setup after
+ * APIC NMI watchdog setup. Note that if this previous setup doesn't happen
+ * we still must enable both counters.
+ *
+ * P4 Xeon with Hyperthreading has counters per physical package which can
+ * count events from either logical CPU. However, in many cases more than
+ * ECSR and CCCR/counter can be used to count the same event. For instr or
+ * uops retired, use either ESCR0/IQ_CCCR0 ESCR1/IQ_CCCR2.
+ *
+ * USE CONFIG_MPENTIUM4_HT for a P4 Xeon with hyperthreading.
+ *
+ * Note that the counters may be initialised on each logical processor
+ * which will cause each physical processor to be initialised twice. This
+ * should not cause a problem.
+ */
+
+#ifndef P4PERF_H
+#define P4PERF_H
+
+#ifdef __KERNEL__
+#include <asm/msr.h>
+#endif
+
+/*****************************************************************************
+ * Performance counter configuration.                                        *
+ *****************************************************************************/
+
+#ifndef P6_EVNTSEL_OS
+# define P6_EVNTSEL_OS     (1 << 17)
+# define P6_EVNTSEL_USR    (1 << 16)
+# define P6_EVNTSEL_E      (1 << 18)
+# define P6_EVNTSEL_EN     (1 << 22)
+#endif
+#define P6_PERF_INST_RETIRED 0xc0
+#define P6_PERF_UOPS_RETIRED 0xc2
+
+#define P4_ESCR_USR                    (1 << 2)
+#define P4_ESCR_OS                     (1 << 3)
+#define P4_ESCR_T0_USR                 (1 << 2) /* First logical CPU  */
+#define P4_ESCR_T0_OS                  (1 << 3)
+#define P4_ESCR_T1_USR                 (1 << 0) /* Second logical CPU */
+#define P4_ESCR_T1_OS                  (1 << 1)
+#define P4_ESCR_TE                     (1 << 4)
+#define P4_ESCR_THREADS(t)             (t)
+#define P4_ESCR_TV(tag)                (tag << 5)
+#define P4_ESCR_EVNTSEL(e)             (e << 25)
+#define P4_ESCR_EVNTMASK(e)            (e << 9)
+
+#define P4_ESCR_EVNTSEL_FRONT_END      0x08
+#define P4_ESCR_EVNTSEL_EXECUTION      0x0c
+#define P4_ESCR_EVNTSEL_REPLAY         0x09
+#define P4_ESCR_EVNTSEL_INSTR_RETIRED  0x02
+#define P4_ESCR_EVNTSEL_UOPS_RETIRED   0x01
+#define P4_ESCR_EVNTSEL_UOP_TYPE       0x02
+#define P4_ESCR_EVNTSEL_RET_MBR_TYPE   0x05
+//#define P4_ESCR_EVNTSEL_RET_MBR_TYPE   0x04
+
+#define P4_ESCR_EVNTMASK_FE_NBOGUS     0x01
+#define P4_ESCR_EVNTMASK_FE_BOGUS      0x02
+
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS0  0x01
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS1  0x02
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS2  0x04
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS3  0x08
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS0   0x10
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS1   0x20
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS2   0x40
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS3   0x80
+
+#define P4_ESCR_EVNTMASK_REPLAY_NBOGUS 0x01
+#define P4_ESCR_EVNTMASK_REPLAY_BOGUS  0x02
+
+#define P4_ESCR_EVNTMASK_IRET_NB_NTAG  0x01
+#define P4_ESCR_EVNTMASK_IRET_NB_TAG   0x02
+#define P4_ESCR_EVNTMASK_IRET_B_NTAG   0x04
+#define P4_ESCR_EVNTMASK_IRET_B_TAG    0x08
+
+#define P4_ESCR_EVNTMASK_URET_NBOGUS   0x01
+#define P4_ESCR_EVNTMASK_URET_BOGUS    0x02
+
+#define P4_ESCR_EVNTMASK_UOP_LOADS     0x02
+#define P4_ESCR_EVNTMASK_UOP_STORES    0x04
+
+#define P4_ESCR_EVNTMASK_RMBRT_COND    0x02
+#define P4_ESCR_EVNTMASK_RMBRT_CALL    0x04
+#define P4_ESCR_EVNTMASK_RMBRT_RETURN  0x08
+#define P4_ESCR_EVNTMASK_RMBRT_INDIR   0x10
+
+#define P4_ESCR_EVNTMASK_RBRT_COND     0x02
+#define P4_ESCR_EVNTMASK_RBRT_CALL     0x04
+#define P4_ESCR_EVNTMASK_RBRT_RETURN   0x08
+#define P4_ESCR_EVNTMASK_RBRT_INDIR    0x10
+
+//#define P4_ESCR_EVNTMASK_INSTR_RETIRED 0x01  /* Non bogus, not tagged */
+//#define P4_ESCR_EVNTMASK_UOPS_RETIRED  0x01  /* Non bogus             */
+
+#define P4_CCCR_OVF                    (1 << 31)
+#define P4_CCCR_CASCADE                (1 << 30)
+#define P4_CCCR_FORCE_OVF              (1 << 25)
+#define P4_CCCR_EDGE                   (1 << 24)
+#define P4_CCCR_COMPLEMENT             (1 << 19)
+#define P4_CCCR_COMPARE                (1 << 18)
+#define P4_CCCR_THRESHOLD(t)           (t << 20)
+#define P4_CCCR_ENABLE                 (1 << 12)
+#define P4_CCCR_ESCR(escr)             (escr << 13)
+#define P4_CCCR_ACTIVE_THREAD(t)       (t << 16)   /* Set to 11 */
+#define P4_CCCR_OVF_PMI_T0             (1 << 26)
+#define P4_CCCR_OVF_PMI_T1             (1 << 27)
+#define P4_CCCR_RESERVED               (3 << 16)
+#define P4_CCCR_OVF_PMI                (1 << 26)
+
+// BPU
+#define MSR_P4_BPU_COUNTER0            0x300
+#define MSR_P4_BPU_COUNTER1            0x301
+#define MSR_P4_BPU_CCCR0               0x360
+#define MSR_P4_BPU_CCCR1               0x361
+
+#define MSR_P4_BPU_COUNTER2            0x302
+#define MSR_P4_BPU_COUNTER3            0x303
+#define MSR_P4_BPU_CCCR2               0x362
+#define MSR_P4_BPU_CCCR3               0x363
+
+#define MSR_P4_BSU_ESCR0               0x3a0
+#define MSR_P4_FSB_ESCR0               0x3a2
+#define MSR_P4_MOB_ESCR0               0x3aa
+#define MSR_P4_PMH_ESCR0               0x3ac
+#define MSR_P4_BPU_ESCR0               0x3b2
+#define MSR_P4_IS_ESCR0                0x3b4
+#define MSR_P4_ITLB_ESCR0              0x3b6
+#define MSR_P4_IX_ESCR0                0x3c8
+
+#define P4_BSU_ESCR0_NUMBER            7
+#define P4_FSB_ESCR0_NUMBER            6
+#define P4_MOB_ESCR0_NUMBER            2
+#define P4_PMH_ESCR0_NUMBER            4
+#define P4_BPU_ESCR0_NUMBER            0
+#define P4_IS_ESCR0_NUMBER             1
+#define P4_ITLB_ESCR0_NUMBER           3
+#define P4_IX_ESCR0_NUMBER             5
+
+#define MSR_P4_BSU_ESCR1               0x3a1
+#define MSR_P4_FSB_ESCR1               0x3a3
+#define MSR_P4_MOB_ESCR1               0x3ab
+#define MSR_P4_PMH_ESCR1               0x3ad
+#define MSR_P4_BPU_ESCR1               0x3b3
+#define MSR_P4_IS_ESCR1                0x3b5
+#define MSR_P4_ITLB_ESCR1              0x3b7
+#define MSR_P4_IX_ESCR1                0x3c9
+
+#define P4_BSU_ESCR1_NUMBER            7
+#define P4_FSB_ESCR1_NUMBER            6
+#define P4_MOB_ESCR1_NUMBER            2
+#define P4_PMH_ESCR1_NUMBER            4
+#define P4_BPU_ESCR1_NUMBER            0
+#define P4_IS_ESCR1_NUMBER             1
+#define P4_ITLB_ESCR1_NUMBER           3
+#define P4_IX_ESCR1_NUMBER             5
+
+// MS
+#define MSR_P4_MS_COUNTER0             0x304
+#define MSR_P4_MS_COUNTER1             0x305
+#define MSR_P4_MS_CCCR0                0x364
+#define MSR_P4_MS_CCCR1                0x365
+
+#define MSR_P4_MS_COUNTER2             0x306
+#define MSR_P4_MS_COUNTER3             0x307
+#define MSR_P4_MS_CCCR2                0x366
+#define MSR_P4_MS_CCCR3                0x367
+
+#define MSR_P4_MS_ESCR0                0x3c0
+#define MSR_P4_TBPU_ESCR0              0x3c2
+#define MSR_P4_TC_ESCR0                0x3c4
+
+#define P4_MS_ESCR0_NUMBER             0
+#define P4_TBPU_ESCR0_NUMBER           2
+#define P4_TC_ESCR0_NUMBER             1
+
+#define MSR_P4_MS_ESCR1                0x3c1
+#define MSR_P4_TBPU_ESCR1              0x3c3
+#define MSR_P4_TC_ESCR1                0x3c5
+
+#define P4_MS_ESCR1_NUMBER             0
+#define P4_TBPU_ESCR1_NUMBER           2
+#define P4_TC_ESCR1_NUMBER             1
+
+// FLAME
+#define MSR_P4_FLAME_COUNTER0          0x308
+#define MSR_P4_FLAME_COUNTER1          0x309
+#define MSR_P4_FLAME_CCCR0             0x368
+#define MSR_P4_FLAME_CCCR1             0x369
+
+#define MSR_P4_FLAME_COUNTER2          0x30a
+#define MSR_P4_FLAME_COUNTER3          0x30b
+#define MSR_P4_FLAME_CCCR2             0x36a
+#define MSR_P4_FLAME_CCCR3             0x36b
+
+#define MSR_P4_FIRM_ESCR0              0x3a4
+#define MSR_P4_FLAME_ESCR0             0x3a6
+#define MSR_P4_DAC_ESCR0               0x3a8
+#define MSR_P4_SAAT_ESCR0              0x3ae
+#define MSR_P4_U2L_ESCR0               0x3b0
+
+#define P4_FIRM_ESCR0_NUMBER           1
+#define P4_FLAME_ESCR0_NUMBER          0
+#define P4_DAC_ESCR0_NUMBER            5
+#define P4_SAAT_ESCR0_NUMBER           2
+#define P4_U2L_ESCR0_NUMBER            3
+
+#define MSR_P4_FIRM_ESCR1              0x3a5
+#define MSR_P4_FLAME_ESCR1             0x3a7
+#define MSR_P4_DAC_ESCR1               0x3a9
+#define MSR_P4_SAAT_ESCR1              0x3af
+#define MSR_P4_U2L_ESCR1               0x3b1
+
+#define P4_FIRM_ESCR1_NUMBER           1
+#define P4_FLAME_ESCR1_NUMBER          0
+#define P4_DAC_ESCR1_NUMBER            5
+#define P4_SAAT_ESCR1_NUMBER           2
+#define P4_U2L_ESCR1_NUMBER            3
+
+// IQ
+#define MSR_P4_IQ_COUNTER0             0x30c
+#define MSR_P4_IQ_COUNTER1             0x30d
+#define MSR_P4_IQ_CCCR0                0x36c
+#define MSR_P4_IQ_CCCR1                0x36d
+
+#define MSR_P4_IQ_COUNTER2             0x30e
+#define MSR_P4_IQ_COUNTER3             0x30f
+#define MSR_P4_IQ_CCCR2                0x36e
+#define MSR_P4_IQ_CCCR3                0x36f
+
+#define MSR_P4_IQ_COUNTER4             0x310
+#define MSR_P4_IQ_COUNTER5             0x311
+#define MSR_P4_IQ_CCCR4                0x370
+#define MSR_P4_IQ_CCCR5                0x371
+
+#define MSR_P4_CRU_ESCR0               0x3b8
+#define MSR_P4_CRU_ESCR2               0x3cc
+#define MSR_P4_CRU_ESCR4               0x3e0
+#define MSR_P4_IQ_ESCR0                0x3ba
+#define MSR_P4_RAT_ESCR0               0x3bc
+#define MSR_P4_SSU_ESCR0               0x3be
+#define MSR_P4_ALF_ESCR0               0x3ca
+
+#define P4_CRU_ESCR0_NUMBER            4
+#define P4_CRU_ESCR2_NUMBER            5
+#define P4_CRU_ESCR4_NUMBER            6
+#define P4_IQ_ESCR0_NUMBER             0
+#define P4_RAT_ESCR0_NUMBER            2
+#define P4_SSU_ESCR0_NUMBER            3
+#define P4_ALF_ESCR0_NUMBER            1
+
+#define MSR_P4_CRU_ESCR1               0x3b9
+#define MSR_P4_CRU_ESCR3               0x3cd
+#define MSR_P4_CRU_ESCR5               0x3e1
+#define MSR_P4_IQ_ESCR1                0x3bb
+#define MSR_P4_RAT_ESCR1               0x3bd
+#define MSR_P4_ALF_ESCR1               0x3cb
+
+#define P4_CRU_ESCR1_NUMBER            4
+#define P4_CRU_ESCR3_NUMBER            5
+#define P4_CRU_ESCR5_NUMBER            6
+#define P4_IQ_ESCR1_NUMBER             0
+#define P4_RAT_ESCR1_NUMBER            2
+#define P4_ALF_ESCR1_NUMBER            1
+
+#define P4_BPU_COUNTER0_NUMBER         0
+#define P4_BPU_COUNTER1_NUMBER         1
+#define P4_BPU_COUNTER2_NUMBER         2
+#define P4_BPU_COUNTER3_NUMBER         3
+
+#define P4_MS_COUNTER0_NUMBER          4
+#define P4_MS_COUNTER1_NUMBER          5
+#define P4_MS_COUNTER2_NUMBER          6
+#define P4_MS_COUNTER3_NUMBER          7
+
+#define P4_FLAME_COUNTER0_NUMBER       8
+#define P4_FLAME_COUNTER1_NUMBER       9
+#define P4_FLAME_COUNTER2_NUMBER       10
+#define P4_FLAME_COUNTER3_NUMBER       11
+
+#define P4_IQ_COUNTER0_NUMBER          12
+#define P4_IQ_COUNTER1_NUMBER          13
+#define P4_IQ_COUNTER2_NUMBER          14
+#define P4_IQ_COUNTER3_NUMBER          15
+#define P4_IQ_COUNTER4_NUMBER          16
+#define P4_IQ_COUNTER5_NUMBER          17
+
+/* PEBS
+ */
+#define MSR_P4_PEBS_ENABLE             0x3F1
+#define MSR_P4_PEBS_MATRIX_VERT        0x3F2
+
+#define P4_PEBS_ENABLE_MY_THR          (1 << 25)
+#define P4_PEBS_ENABLE_OTH_THR         (1 << 26)
+#define P4_PEBS_ENABLE                 (1 << 24)
+#define P4_PEBS_BIT0                   (1 << 0)
+#define P4_PEBS_BIT1                   (1 << 1)
+#define P4_PEBS_BIT2                   (1 << 2)
+
+#define P4_PEBS_MATRIX_VERT_BIT0       (1 << 0)
+#define P4_PEBS_MATRIX_VERT_BIT1       (1 << 1)
+#define P4_PEBS_MATRIX_VERT_BIT2       (1 << 2)
+
+/* Replay tagging.
+ */
+#define P4_REPLAY_TAGGING_PEBS_L1LMR   P4_PEBS_BIT0
+#define P4_REPLAY_TAGGING_PEBS_L2LMR   P4_PEBS_BIT1
+#define P4_REPLAY_TAGGING_PEBS_DTLMR   P4_PEBS_BIT2
+#define P4_REPLAY_TAGGING_PEBS_DTSMR   P4_PEBS_BIT2
+#define P4_REPLAY_TAGGING_PEBS_DTAMR   P4_PEBS_BIT2
+
+#define P4_REPLAY_TAGGING_VERT_L1LMR   P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_L2LMR   P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_DTLMR   P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_DTSMR   P4_PEBS_MATRIX_VERT_BIT1
+#define P4_REPLAY_TAGGING_VERT_DTAMR   P4_PEBS_MATRIX_VERT_BIT0 | P4_PEBS_MATRIX_VERT_BIT1
+
+
+
+
+/*****************************************************************************
+ *                                                                           *
+ *****************************************************************************/
+
+// x87_FP_uop
+#define EVENT_SEL_x87_FP_uop                0x04
+#define EVENT_MASK_x87_FP_uop_ALL           (1 << 15)
+
+// execution event (at retirement)
+#define EVENT_SEL_execution_event           0x0C
+
+// scalar_SP_uop
+#define EVENT_SEL_scalar_SP_uop             0x0a
+#define EVENT_MASK_scalar_SP_uop_ALL        (1 << 15)
+
+// scalar_DP_uop
+#define EVENT_SEL_scalar_DP_uop             0x0e
+#define EVENT_MASK_scalar_DP_uop_ALL        (1 << 15)
+
+// Instruction retired
+#define EVENT_SEL_instr_retired             0x02
+#define EVENT_MASK_instr_retired_ALL        0x0f
+
+// uOps retired
+#define EVENT_SEL_uops_retired              0x01
+#define EVENT_MASK_uops_retired_ALL         0x03
+
+// L1 misses retired
+#define EVENT_SEL_replay_event              0x09
+#define EVENT_MASK_replay_event_ALL         0x03
+
+// Trace cache
+#define EVENT_SEL_BPU_fetch_request         0x03
+#define EVENT_MASK_BPU_fetch_request_TCMISS 0x01
+
+// Bus activity
+#define EVENT_SEL_FSB_data_activity               0x17
+#define EVENT_MASK_FSB_data_activity_DRDY_DRV     0x01
+#define EVENT_MASK_FSB_data_activity_DRDY_OWN     0x02
+#define EVENT_MASK_FSB_data_activity_DRDY_OOTHER  0x04
+#define EVENT_MASK_FSB_data_activity_DBSY_DRV     0x08
+#define EVENT_MASK_FSB_data_activity_DBSY_OWN     0x10
+#define EVENT_MASK_FSB_data_activity_DBSY_OOTHER  0x20
+
+// Cache L2
+#define EVENT_SEL_BSQ_cache_reference             0x0c
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITS 0x001
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITE 0x002
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITM 0x004
+
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITS 0x008
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITE 0x010
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITM 0x020
+
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_MISS 0x100
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_MISS 0x200
+#define EVENT_MASK_BSQ_cache_reference_WR_L2_MISS 0x400
+
+/*****************************************************************************
+ *                                                                           *
+ *****************************************************************************/
+
+
+/* The following turn configuration macros into 1/0 to allow code to be
+ * selected using if(MPENTIUM4_HT) rather then #ifdef (to avoid stale code).
+ * We rely on the compiler to optimise out unreachable code,
+ */
+#ifdef CONFIG_MPENTIUM4_HT
+# define MPENTIUM4_HT 1
+#else
+# define MPENTIUM4_HT 0
+#endif
+
+#ifdef CONFIG_MPENTIUMIII
+# define MPENTIUMIII 1
+#else
+# define MPENTIUMIII 0
+#endif
+
+#ifdef CONFIG_MPENTIUM4
+# define MPENTIUM4 1
+#else
+# define MPENTIUM4 0
+#endif
+
+/*****************************************************************************
+ * MSR access macros                                                         *
+ *****************************************************************************/
+
+/* rpcc: get full 64-bit Pentium TSC value
+ */
+static __inline__ unsigned long long int rpcc(void) 
+{
+    unsigned int __h, __l;
+    __asm__ __volatile__ ("rdtsc" :"=a" (__l), "=d" (__h));
+    return (((unsigned long long)__h) << 32) + __l;
+}
+
+/*****************************************************************************
+ * Functions.                                                                *
+ *****************************************************************************/
+
+#ifdef __KERNEL__
+static inline void smt_sched_setup(void)
+{
+    if (MPENTIUMIII) {
+        unsigned int evntsel, x;
+        
+        /* Make sure counters enabled. */
+        rdmsr(MSR_P6_EVNTSEL0, evntsel, x);
+        evntsel |= P6_EVNTSEL_EN;
+        wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
+        
+        evntsel =
+            P6_PERF_INST_RETIRED | 
+            P6_EVNTSEL_OS        | 
+            P6_EVNTSEL_USR       | 
+            P6_EVNTSEL_E;
+        wrmsr(MSR_P6_EVNTSEL1, evntsel, 0);
+    }
+
+    if(MPENTIUM4) {
+        unsigned int x;
+        
+        /* Program the ESCR */
+        x = P4_ESCR_USR                                    |
+            P4_ESCR_OS                                     | 
+            P4_ESCR_EVNTSEL(P4_ESCR_EVNTSEL_INSTR_RETIRED) | 
+            P4_ESCR_EVNTMASK(P4_ESCR_EVNTMASK_IRET_NB_NTAG);
+        wrmsr(MSR_P4_CRU_ESCR0, x, 0);
+        
+        /* Program the CCCR */
+        if (MPENTIUM4_HT) {
+            x = P4_CCCR_ENABLE                    | 
+                P4_CCCR_ESCR(P4_CRU_ESCR0_NUMBER) |
+                P4_CCCR_ACTIVE_THREAD(3);
+        }
+        else {
+            x = P4_CCCR_ENABLE                    | 
+                P4_CCCR_ESCR(P4_CRU_ESCR0_NUMBER) |
+                P4_CCCR_RESERVED;
+        }
+        wrmsr(MSR_P4_IQ_CCCR0, x, 0);
+
+        if (MPENTIUM4_HT) {
+
+            /* Program the second ESCR */
+            x = P4_ESCR_T1_USR                                 |
+                P4_ESCR_T1_OS                                  | 
+                P4_ESCR_EVNTSEL(P4_ESCR_EVNTSEL_INSTR_RETIRED) | 
+                P4_ESCR_EVNTMASK(P4_ESCR_EVNTMASK_IRET_NB_NTAG);
+            wrmsr(MSR_P4_CRU_ESCR1, x, 0);
+            
+            /* Program the second CCCR */
+            x = P4_CCCR_ENABLE                    |
+                P4_CCCR_ESCR(P4_CRU_ESCR1_NUMBER) |
+                P4_CCCR_ACTIVE_THREAD(3);
+            wrmsr(MSR_P4_IQ_CCCR2, x, 0);
+        }
+    }
+
+    if (!MPENTIUMIII && !MPENTIUM4) {
+        printk("WARNING: Not setting up IPC performance counters.\n");
+    } else {
+        printk("Setting up IPC performance counters.\n");
+    }
+}
+
+#ifdef CONFIG_MPENTIUMIII
+# define MY_MSR_COUNTER MSR_P6_PERFCTR1
+#endif
+#ifdef CONFIG_MPENTIUM4
+# define MY_MSR_COUNTER MSR_P4_IQ_COUNTER0
+#endif
+#ifndef MY_MSR_COUNTER
+# define MY_MSR_COUNTER 0 /* Never used but ensures compilation */
+#endif
+#define MY_MSR_COUNTER0 MSR_P4_IQ_COUNTER0
+#define MY_MSR_COUNTER1 MSR_P4_IQ_COUNTER2
+
+# define smt_sched_start_sample(task)                                        \
+{                                                                            \
+    unsigned int l, h;                                                       \
+                                                                             \
+    if (MPENTIUM4_HT) {                                                      \
+        unsigned int msr =                                                   \
+            (task->processor & 1)?MY_MSR_COUNTER1:MY_MSR_COUNTER0;           \
+        rdmsr(msr, l, h);                                                    \
+    }                                                                        \
+    else {                                                                   \
+        rdmsr(MY_MSR_COUNTER, l, h);                                         \
+    }                                                                        \
+    task->ipc_sample_start_count_lo = l;                                     \
+    task->ipc_sample_start_count_hi = h;                                     \
+    rdtsc(l, h);                                                             \
+    task->ipc_sample_start_cycle_lo = l;                                     \
+    task->ipc_sample_start_cycle_hi = h;                                     \
+}
+
+# define smt_sched_stop_sample(task)                                         \
+{                                                                            \
+    if (task->ipc_sample_start_cycle_hi != 0)                                \
+    {                                                                        \
+        unsigned int cl, ch, tl, th;                                         \
+        unsigned int c, t;                                                   \
+                                                                             \
+        if (MPENTIUM4_HT) {                                                  \
+            unsigned int msr =                                               \
+                (task->processor & 1)?MY_MSR_COUNTER1:MY_MSR_COUNTER0;       \
+            rdmsr(msr, cl, ch);                                              \
+        }                                                                    \
+        else {                                                               \
+            rdmsr(MY_MSR_COUNTER, cl, ch);                                   \
+        }                                                                    \
+                                                                             \
+        rdtsc(tl, th);                                                       \
+                                                                             \
+        c = cl - task->ipc_sample_start_count_lo;                            \
+        t = tl - task->ipc_sample_start_cycle_lo;                            \
+        task->ipc_average = IPC_AVERAGE(task->ipc_average,                   \
+                                        ((double)c)/((double)t));            \
+        task->ipc_sample_start_cycle_hi = 0;                                 \
+                                                                             \
+    }                                                                        \
+    else                                                                     \
+        task->ipc_average = 0.0;                                             \
+                                                                             \
+}
+
+//        task->ipc_sample_latest =                                            
+//            (unsigned int)(1000.0*((double)c)/((double)t));                  
+#endif /* __KERNEL__ */
+
+
+#endif /* P4PERF_H */
+
+/* End of $RCSfile$ */
diff --git a/tools/misc/xen_cpuperf.c b/tools/misc/xen_cpuperf.c
new file mode 100644 (file)
index 0000000..293997b
--- /dev/null
@@ -0,0 +1,265 @@
+/*
+ * User mode program to prod MSR values through /proc/perfcntr
+ *
+ *
+ * $Id$
+ *
+ * $Log$
+ */
+
+#include <sys/types.h>
+#include <sched.h>
+#include <error.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "p4perf.h"
+#include "hypervisor-ifs/dom0_ops.h"
+#include "dom0_defs.h"
+
+void dom0_wrmsr( int cpu_mask, int msr, unsigned int low, unsigned int high )
+{
+  dom0_op_t op;
+  op.cmd = DOM0_MSR;
+  op.u.msr.write = 1;
+  op.u.msr.msr = msr;
+  op.u.msr.cpu_mask = cpu_mask;
+  op.u.msr.in1 = low;
+  op.u.msr.in2 = high;
+  do_dom0_op(&op);
+}
+
+unsigned long long dom0_rdmsr( int cpu_mask, int msr )
+{
+  dom0_op_t op;
+  op.cmd = DOM0_MSR;
+  op.u.msr.write = 0;
+  op.u.msr.msr = msr;
+  op.u.msr.cpu_mask = cpu_mask;
+  do_dom0_op(&op);
+  return (((unsigned long long)op.u.msr.out2)<<32) | op.u.msr.out1 ;
+} 
+
+struct macros {
+    char         *name;
+    unsigned long msr_addr;
+    int           number;
+};
+
+struct macros msr[] = {
+    {"BPU_COUNTER0", 0x300, 0},
+    {"BPU_COUNTER1", 0x301, 1},
+    {"BPU_COUNTER2", 0x302, 2},
+    {"BPU_COUNTER3", 0x303, 3},
+    {"MS_COUNTER0", 0x304, 4},
+    {"MS_COUNTER1", 0x305, 5},
+    {"MS_COUNTER2", 0x306, 6},
+    {"MS_COUNTER3", 0x307, 7},
+    {"FLAME_COUNTER0", 0x308, 8},
+    {"FLAME_COUNTER1", 0x309, 9},
+    {"FLAME_COUNTER2", 0x30a, 10},
+    {"FLAME_COUNTER3", 0x30b, 11},
+    {"IQ_COUNTER0", 0x30c, 12},
+    {"IQ_COUNTER1", 0x30d, 13},
+    {"IQ_COUNTER2", 0x30e, 14},
+    {"IQ_COUNTER3", 0x30f, 15},
+    {"IQ_COUNTER4", 0x310, 16},
+    {"IQ_COUNTER5", 0x311, 17},
+    {"BPU_CCCR0", 0x360, 0},
+    {"BPU_CCCR1", 0x361, 1},
+    {"BPU_CCCR2", 0x362, 2},
+    {"BPU_CCCR3", 0x363, 3},
+    {"MS_CCCR0", 0x364, 4},
+    {"MS_CCCR1", 0x365, 5},
+    {"MS_CCCR2", 0x366, 6},
+    {"MS_CCCR3", 0x367, 7},
+    {"FLAME_CCCR0", 0x368, 8},
+    {"FLAME_CCCR1", 0x369, 9},
+    {"FLAME_CCCR2", 0x36a, 10},
+    {"FLAME_CCCR3", 0x36b, 11},
+    {"IQ_CCCR0", 0x36c, 12},
+    {"IQ_CCCR1", 0x36d, 13},
+    {"IQ_CCCR2", 0x36e, 14},
+    {"IQ_CCCR3", 0x36f, 15},
+    {"IQ_CCCR4", 0x370, 16},
+    {"IQ_CCCR5", 0x371, 17},
+    {"BSU_ESCR0", 0x3a0, 7},
+    {"BSU_ESCR1", 0x3a1, 7},
+    {"FSB_ESCR0", 0x3a2, 6},
+    {"FSB_ESCR1", 0x3a3, 6},
+    {"MOB_ESCR0", 0x3aa, 2},
+    {"MOB_ESCR1", 0x3ab, 2},
+    {"PMH_ESCR0", 0x3ac, 4},
+    {"PMH_ESCR1", 0x3ad, 4},
+    {"BPU_ESCR0", 0x3b2, 0},
+    {"BPU_ESCR1", 0x3b3, 0},
+    {"IS_ESCR0", 0x3b4, 1},
+    {"IS_ESCR1", 0x3b5, 1},
+    {"ITLB_ESCR0", 0x3b6, 3},
+    {"ITLB_ESCR1", 0x3b7, 3},
+    {"IX_ESCR0", 0x3c8, 5},
+    {"IX_ESCR1", 0x3c9, 5},
+    {"MS_ESCR0", 0x3c0, 0},
+    {"MS_ESCR1", 0x3c1, 0},
+    {"TBPU_ESCR0", 0x3c2, 2},
+    {"TBPU_ESCR1", 0x3c3, 2},
+    {"TC_ESCR0", 0x3c4, 1},
+    {"TC_ESCR1", 0x3c5, 1},
+    {"FIRM_ESCR0", 0x3a4, 1},
+    {"FIRM_ESCR1", 0x3a5, 1},
+    {"FLAME_ESCR0", 0x3a6, 0},
+    {"FLAME_ESCR1", 0x3a7, 0},
+    {"DAC_ESCR0", 0x3a8, 5},
+    {"DAC_ESCR1", 0x3a9, 5},
+    {"SAAT_ESCR0", 0x3ae, 2},
+    {"SAAT_ESCR1", 0x3af, 2},
+    {"U2L_ESCR0", 0x3b0, 3},
+    {"U2L_ESCR1", 0x3b1, 3},
+    {"CRU_ESCR0", 0x3b8, 4},
+    {"CRU_ESCR1", 0x3b9, 4},
+    {"CRU_ESCR2", 0x3cc, 5},
+    {"CRU_ESCR3", 0x3cd, 5},
+    {"CRU_ESCR4", 0x3e0, 6},
+    {"CRU_ESCR5", 0x3e1, 6},
+    {"IQ_ESCR0", 0x3ba, 0},
+    {"IQ_ESCR1", 0x3bb, 0},
+    {"RAT_ESCR0", 0x3bc, 2},
+    {"RAT_ESCR1", 0x3bd, 2},
+    {"SSU_ESCR0", 0x3be, 3},
+    {"SSU_ESCR1", 0x3bf, 3},
+    {"ALF_ESCR0", 0x3ca, 1},
+    {"ALF_ESCR1", 0x3cb, 1},
+    {"PEBS_ENABLE", 0x3f1, 0},
+    {"PEBS_MATRIX_VERT", 0x3f2, 0},
+    {NULL, 0, 0}
+};
+
+struct macros *lookup_macro(char *str)
+{
+    struct macros *m;
+
+    m = msr;
+    while (m->name) {
+        if (strcmp(m->name, str) == 0)
+            return m;
+        m++;
+    }
+    return NULL;
+}
+
+int main(int argc, char **argv)
+{
+    int c, t = 0xc, es = 0, em = 0, tv = 0, te = 0;
+    unsigned int cpu_mask = 1; 
+    struct macros *escr = NULL, *cccr = NULL;
+    unsigned long escr_val, cccr_val;
+    int debug = 0;
+    unsigned long pebs = 0, pebs_vert = 0;
+    int pebs_x = 0, pebs_vert_x = 0;
+    int read = 0;
+    while ((c = getopt(argc, argv, "dc:t:e:m:T:E:C:P:V:r")) != -1) {
+        switch((char)c) {
+        case 'P':
+            pebs |= 1 << atoi(optarg);
+            pebs_x = 1;
+            break;
+        case 'V':
+            pebs_vert |= 1 << atoi(optarg);
+            pebs_vert_x = 1;
+            break;
+        case 'd':
+            debug = 1;
+            break;
+        case 'c':
+           {
+            int cpu = atoi(optarg);
+            cpu_mask  = (cpu == -1)?(~0):(1<<cpu);
+            break;
+            }
+        case 't': // ESCR thread bits
+            t = atoi(optarg);
+            break;
+        case 'e': // eventsel
+            es = atoi(optarg);
+            break;
+        case 'm': // eventmask
+            em = atoi(optarg);
+            break;
+        case 'T': // tag value
+            tv = atoi(optarg);
+            te = 1;
+            break;
+        case 'E':
+            escr = lookup_macro(optarg);
+            if (!escr) {
+                fprintf(stderr, "Macro '%s' not found.\n", optarg);
+                exit(1);
+            }
+            break;
+        case 'C':
+            cccr = lookup_macro(optarg);
+            if (!cccr) {
+                fprintf(stderr, "Macro '%s' not found.\n", optarg);
+                exit(1);
+            }
+            break;
+       case 'r':
+           read = 1;
+           break;
+        }
+    }
+
+    if (read) {
+       while((cpu_mask&1)) {
+       int i;
+       for (i=0x300;i<0x312;i++)
+         {
+           printf("%010llx ",dom0_rdmsr( cpu_mask, i ) );
+         }
+        printf("\n");
+       cpu_mask>>=1;
+       }
+       exit(1);
+    }
+
+    if (!escr) {
+        fprintf(stderr, "Need an ESCR.\n");
+        exit(1);
+    }
+    if (!cccr) {
+        fprintf(stderr, "Need a counter number.\n");
+        exit(1);
+    }
+
+    escr_val = P4_ESCR_THREADS(t) | P4_ESCR_EVNTSEL(es) |
+        P4_ESCR_EVNTMASK(em) | P4_ESCR_TV(tv) | ((te)?P4_ESCR_TE:0);
+    cccr_val = P4_CCCR_ENABLE | P4_CCCR_ESCR(escr->number) |
+        P4_CCCR_ACTIVE_THREAD(3)/*reserved*/;
+
+    if (debug) {
+        fprintf(stderr, "ESCR 0x%lx <= 0x%08lx\n", escr->msr_addr, escr_val);
+        fprintf(stderr, "CCCR 0x%lx <= 0x%08lx (%u)\n",
+                cccr->msr_addr, cccr_val, cccr->number);
+        if (pebs_x)
+            fprintf(stderr, "PEBS 0x%x <= 0x%08lx\n",
+                    MSR_P4_PEBS_ENABLE, pebs);
+        if (pebs_vert_x)
+            fprintf(stderr, "PMV  0x%x <= 0x%08lx\n",
+                    MSR_P4_PEBS_MATRIX_VERT, pebs_vert);
+    }
+
+    dom0_wrmsr( cpu_mask, escr->msr_addr, escr_val, 0 );
+    dom0_wrmsr( cpu_mask, cccr->msr_addr, cccr_val, 0 );
+
+    if (pebs_x)
+      dom0_wrmsr( cpu_mask, MSR_P4_PEBS_ENABLE, pebs, 0 );
+
+    if (pebs_vert_x)
+      dom0_wrmsr( cpu_mask, MSR_P4_PEBS_MATRIX_VERT, pebs_vert, 0 );
+
+    return 0;
+}
+